Assume we have a continious 2-piecewise defined function $f(x) = a_l x + b (x < 0)$/$f(x) = a_r x + b (x > 0)$. Can we detect the "jump in slope" at x=0 from a noisy dataset? Here, noisy means that we add iid $(0, \sigma)$-distributed Gaussian noise to each $f(x_i)$.
In [78]:
from tools.plot import plot
a_l, a_r = 1., -1.
b = 4
sigma = 2.
n_samples = 100
twolin = lambda x, xc, a, b, c: (x < xc) * a * (x - xc) \
+ (x >= xc) * b * (x - xc) + c
f = lambda x: twolin(x, 0, a_l, a_r, b)
xs = np.random.uniform(low=-3, high=3, size=n_samples)
ys = f(xs) + np.random.normal(scale=sigma, size=n_samples)
plot(f, (-3, 3))
pl.plot(xs, ys, lw=0, marker='x')
Out[78]:
In [79]:
def error(f):
return np.sum((f(xs) - ys)**2) / len(xs)
In [80]:
def get_split_fit(xc):
sel = xs < xc
if sum(sel) < 2 or sum(sel) > len(sel) - 2: return False
n = sum(sel) / len(sel)
m = np.mean
num = lambda x, y: m(y) - m(x - xc) * m((x - xc) * y) / m((x - xc)**2)
den = lambda x: m((x - xc))**2 / m((x - xc)**2)
slo = lambda x, y: (m((x - xc) * y) - c * m(x - xc)) / m((x - xc)**2)
c = (n * num(xs[sel], ys[sel]) + (1-n) * num(xs[-sel], ys[-sel])) / \
(1 - n * den(xs[sel]) - (1-n) * den(xs[-sel]))
a = slo(xs[sel], ys[sel])
b = slo(xs[-sel], ys[-sel])
return a, b, c
In [81]:
for xc in np.linspace(-2, 2, 50):
vals = get_split_fit(xc)
pl.scatter([xc], error(lambda x: twolin(x, xc, *vals)))
In [83]:
xc = 0
vals = get_split_fit(xc)
plot(f, (-3, 3))
pl.plot(xs, ys, lw=0, marker='x')
plot(lambda x: twolin(x, xc, *vals), (-3, 3))
print(vals)
Now, can we do this Baysian?
In [2]:
import pymc as pm
In [ ]: